//Programm to compute the micro-geographic IO-linkages which we will use
//in the papers with Mark and Theo
//
//This version: 20/06/2013
//Updates:
//
//

#include <iostream>
#include <gsl/gsl_sf_bessel.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_poly.h>
#include <gsl/gsl_complex_math.h>
#include <gsl/gsl_sf.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_statistics_double.h>
#include <gsl/gsl_sort.h>

#include <vector>

//UTILITY MACROS


#define DEG2RAD(x) (x * M_PI) / 180.0;
#define EARTH_RADIUS 6378.137

using namespace std;

enum LOGIC {FALSE = 0 , TRUE = 1};



//UTILITY FUNCTIONS


//Allocate array


double** alloc_large_array(long rows, long cols)
{
    double **array_to_alloc = new double*[rows];
    
    for (long i = 0; i < rows; i++)
    {
        array_to_alloc[i] = new double[cols];
        
        if (array_to_alloc[i] == NULL)
        {
            fprintf(stderr, "ERROR - Memory could not be allocated, aborting\n");
            return(NULL);
        }
    }
    return(array_to_alloc);
}


double** free_large_array(long rows, double **ptr)
{
    for (long i = 0; i < rows; i++)
    {
        free(ptr[i]);
    }
    
    free(ptr);
    
    return(NULL);
}


void read_industry_map(char *ifile_name, long **industry_map)
{
    FILE *ifile_map = fopen(ifile_name, "r");
    
    for (int i = 0; i < 259; i++)
    {
        fscanf(ifile_map, "%ld %ld", &industry_map[i][0], &industry_map[i][1]);
    }
    
    fclose(ifile_map);
}

double map(long **industry_map, double tomap)
{
    //Convert the industry codes into cpp table codes
    for (int i = 0; i < 259; i++)
    {
        if (industry_map[i][0] == (long) tomap)
        {
            return((double) industry_map[i][1]);
        }
    }
    
    return(0.0);
}


//MAIN PROCEDURE


int main (int argc, char *const argv[])
{
    //Parameters
    char ofile_out[1024];	//file used for output
    
    int t, tt, geosize, num_firms_naics6, num_firms_naics6_back, firm_naics, smoothing, counterfactual, skip_num, skip_flag;
    
    long industry6, industry6_map;
    
    clock_t start, finish;
    
    double ilat  = 0, ilon  = 0, jlat  = 0, jlon  = 0, distance = 0, ind, temp;
    
    //Matrices
    double iomatrix_R[260][260]; //IO matrix (raw data)
    double iomatrix_O[260][260]; //IO links output (supplier standardized)
    double iomatrix_I[260][260]; //IO links input (client standardized)
    
    //Allocate space to contain the industry map
    long **industry_map = new long*[259]; //mapping from NAICS6 codes to cpp tables
    
    for (int i = 0; i < 259; i++)
    {
        industry_map[i] = new long[2];
        
        if (industry_map[i] == NULL)
        {
            fprintf(stderr, "ERROR - Memory could not be allocated, aborting\n");
            return(NULL);
        }
    }
    
    int top_i[260], top_o[260];  //top IO-links of the industry
    
    
    //Read in the command line parameters
    
    start = clock();
    
    if (argc < 7)
    {
        fprintf(stderr, "%s: ERROR - Not all arguments correctly specified.\n", argv[0]);
        return(1);
    }
    
    
    //****************************************
    //STEP [1]: Get the command line arguments
    //****************************************
    
    //The universe of the firms to be used is passed to the program as the first argument argv[1]
    
    //Structure of the file is as follows (9 variables):
    //Line 1    : Number of geographical units
    //Line x > 1: locID latitude longitude primarynaics employment
    
    FILE *ifile_geodata;
    double **geospace, **outspace;
    
    if ((ifile_geodata = fopen(argv[1], "r")) == NULL)
    {
        fprintf(stderr, "%s: ERROR - File %s not found, aborting.\n", argv[0], argv[1]);
        return(1);
    }
    
    //The input-output table (raw) is passed as the second argument argv[2]
    
    //Structure of the file is as follows (259 x 259 matrix)
    //Element (i,j) = supply of industry i to industry j
    
    FILE *ifile_iodata;
    
    if ((ifile_iodata = fopen(argv[2], "r")) == NULL)
    {
        fprintf(stderr, "%s: ERROR - File %s not found, aborting.\n", argv[0], argv[2]);
        return(1);
    }
    
    //Output is written to the file specified as the third argument argv[3]
    strcpy(ofile_out, argv[3]);
    
    //Determine the sector to be processed (counterfactual only) from argv[4]
    if ((industry6 = atoi(argv[4])) < 310000 | (industry6 = atoi(argv[4])) > 339999)
    {
        fprintf(stderr, "%s: ERROR - invalid NAICS6 manufacturing industry code, aborting.\n", argv[0]);
        return(2);
    }
    
    //The number of nearest firms across which to compute the linkage measure is the fourth argument argv[5]
    if ((smoothing = atoi(argv[5])) < 1)
    {
        fprintf(stderr, "%s: ERROR - invalid number of firms for smoothing, aborting.\n", argv[0]);
        return(2);
    }
    
    //The last parameter is either 0 for actual linkages, or 1 for counterfactual linkages
    counterfactual = atoi(argv[6]);
    
    
    //**************************************
    //STEP [2]: Read in and prepare the data
    //**************************************
    
    
    //Read in the industry mapping
    read_industry_map("./industry_map.txt", industry_map);
    
    //Read in data for the universe of firm locations (*geodata file)
    
    fscanf(ifile_geodata, "%d", &geosize);
    
    geospace = alloc_large_array(geosize, 6);
    outspace = alloc_large_array(geosize, 3);
    
    fprintf(stdout, "%s: Processing the file with the firms...\n", argv[0], geosize);
    
    fprintf(stdout, "%s: Number of firms to be processed = %d\n", argv[0], geosize);
    fprintf(stdout, "%s: Industry to be processed (counterfactual only) = %ld\n", argv[0], (long) industry6);
    fprintf(stdout, "%s: Smoothing across %d nearest firms\n", argv[0], smoothing);
    
    if (counterfactual == TRUE)
    {
        fprintf(stdout, "%s: Running counterfactual computations for industry %ld\n", argv[0], (long) industry6);
    }
    else
    {
        fprintf(stdout, "%s: Computing actual IO-links for all firms\n", argv[0]);
    }
    
    
    t = 0;
    
    
    while (fscanf(ifile_geodata, "%lf %lf %lf %lf %lf", &geospace[t][0], &geospace[t][1], &geospace[t][2], &geospace[t][3], &geospace[t][4]) != EOF)
    {
        //Convert the latitude and the longitude into radians for distance computations
        geospace[t][1] = DEG2RAD(geospace[t][1]);
        geospace[t][2] = DEG2RAD(geospace[t][2]);
        
        //Create a new industry identifier that corresponds to i=0,1,...,258 that is the index in the cpp IO-table
        geospace[t][5] = map(industry_map, geospace[t][3]);
        
        t++;
    }
    
    //Initialize the output space (outspace)
    
    for (long i = 0; i < geosize; i++)
    {
        outspace[i][0] = geospace[i][0];
        outspace[i][1] = 0.0;
        outspace[i][2] = 0.0;
        //outspace[i][3] = 0.0;
        //outspace[i][4] = 0.0;
        //outspace[i][5] = 0.0;
        //outspace[i][6] = 0.0;
    }
    
    fclose(ifile_geodata);
    
    //Read in the raw input-output matrix (259x259 matrix, W-level matrix converted to NAICS6 level)
    
    for (int i = 0; i < 259; i++)
    {
        for (int j = 0; j < 259; j++)
        {
            fscanf(ifile_iodata, "%lf", &iomatrix_R[i][j]);
            
            //Set own-industry elements to zero, we want to exclude them
            if (i == j) { iomatrix_R[i][j] = 0; }
        }
    }
    
    fclose(ifile_iodata);
    
    
    //Dynamically allocate the matrix holding the minimum distances for each industry, using the smoothing parameter
    
    double **mindist;
    
    mindist = alloc_large_array(259, smoothing);
    
    
    //**************************************************************
    //STEP [3]: Standardize the input-output matrix for computations
    //**************************************************************
    
    //Construct the IO matrix (output linkages (i.e., supplier) = row standardized)
    
    for (int i = 0; i < 259; i++)
    {
        temp = 0.0, ind = 0.0;
        
        //Make the row sum of the matrix
        for (int j = 0; j < 259; j++)
        {
            temp += iomatrix_R[i][j];
        }
        
        //Row standardize the matrix
        for (int j = 0; j < 259; j++)
        {
            iomatrix_O[i][j] = iomatrix_R[i][j] / temp;
        }
        
        //Compute the index of the industry with the strongest O-link
        for (int j = 0; j < 259; j++)
        {
            if (iomatrix_O[i][j] > ind)
            {
                ind = iomatrix_O[i][j];
                top_o[i] = j; //save index of the largest client industry
            }
        }
        
    }
    
    //Construct the IO matrix (input linkages (i.e., client) = col standardized)
    
    for (int j = 0; j < 259; j++)
    {
        temp = 0; ind = 0.0;
        
        //Make the col sum of the matrix
        for (int i = 0; i < 259; i++)
        {
            temp += iomatrix_R[i][j];
        }
        
        //Col standardize the matrix
        for (int i = 0; i < 259; i++)
        {
            iomatrix_I[i][j] = iomatrix_R[i][j] / temp;
        }
        
        //Compute the index of the industry with the strongest I-link
        for (int i = 0; i < 259; i++)
        {
            if (iomatrix_I[i][j] > ind)
            {
                ind = iomatrix_I[i][j];
                top_i[j] = i; //save index of the largest supplier industry
            }
        }
    }
    
    industry6_map = map(industry_map, industry6);
    
    //fprintf(stdout, "%s: Top input = %d, Top output = %d\n", argv[0], top_i[(int) industry6_map], top_o[(int) industry6_map]);
    
    //Cross-check that elements sum to one
    
    
    /*for (int i = 0; i < 259; i++)
     {
     temp = 0.0;
     
     for (int j = 0; j < 259; j++)
     {
     temp += iomatrix_O[i][j];
     }
     
     printf("%lf ", temp);
     }
     
     for (int j = 0; j < 259; j++)
     {
     temp = 0.0;
     
     for (int i = 0; i < 259; i++)
     {
     temp += iomatrix_I[i][j];
     }
     
     printf("%lf ", temp);
     }*/
    
    //for (int j = 0; j < geosize; j++)
    //{
    //	printf("%lf %lf %lf\n", geospace[j][0], geospace[j][3], geospace[j][5]);
    //}
    
    
    
    
    //*****************************************************
    //STEP [4]: Compute the different input-output linkages
    //*****************************************************
    
    double temp_dist[geosize][2];
    double avg_mindist[259];
    
    
    fprintf(stdout, "%s: Starting to compute the IO-linkages...\n", argv[0]);
    
    FILE *ofile;
    
    if ((ofile = fopen(ofile_out, "w")) == NULL)
    {
        fprintf(stderr, "%s: ERROR - File %s could not be created, aborting.\n", argv[0], argv[3]);
        return(1);
    }
    
    for (long i = 0; i < geosize; i++)
    {
        
        ilat = geospace[i][1];
        ilon = geospace[i][2];
        
        if (counterfactual == FALSE)
        {
            //Use the NAICS code of the firm really located in i (true IO links)
            firm_naics = map(industry_map, geospace[i][3]);
        }
        else
        {
            //Use the NAICS code that is specified in the command line for all locations (counterfactual IO links)
            firm_naics = map(industry_map, industry6);
        }
        
        
        //for (int j = 0; j < 259; j++)
        //{
        //	avg_mindist[j] = 0.0;
        //}
        
        for (int j = 0; j < geosize; j++)
        {
            //Exclude the firm itself. In case of counterfactual location, replace firm at that location with firm in chosen naics6 sector
            if (i != j)
            {
                //supplier = (long) geospace[j][5];
                
                jlat = geospace[j][1];
                jlon = geospace[j][2];
                
                if ((ilat != jlat) || (ilon != jlon))
                {
                    distance = (acos(sin(ilat) * sin(jlat) + cos(fabs(ilon - jlon)) * cos(ilat) * cos(jlat)) * EARTH_RADIUS);
                }
                else
                {
                    distance = 0.0;
                }
            }
            else
            {
                distance = 99999.99;
            }
            
            
            temp_dist[j][0] = distance;
            temp_dist[j][1] = (long) geospace[j][5];
        }
        
        //Compute the minimum distance (average across 'smoothing' firms in each industry)
        
        //Loop over all sectors
        for (int k = 0; k < 259; k++)
        {
            num_firms_naics6 = 0;
            
            //Count number of firms in the industry
            for (int j = 0; j < geosize; j++)
            {
                if ((int)temp_dist[j][1] == k)
                {
                    num_firms_naics6++;
                }
            }
            
            skip_num = 0;
            
            if (num_firms_naics6 < smoothing)
            {
                num_firms_naics6_back = num_firms_naics6;
                num_firms_naics6 = smoothing;
                skip_num = 1;
            }
            
            //Create vector of correct size
            std::vector<double> temp_vect (num_firms_naics6, 0.0);
            
            tt = 0;
            
            //Write distances in the industry into the vector
            for (int j = 0; j < geosize; j++)
            {
                if ((int)temp_dist[j][1] == k)
                {
                    temp_vect[tt] = temp_dist[j][0];
                    tt++;
                }
            }
            
            //Sort the vector in ascending order, the first x elemens contain the x smallest distances in the industry
            std::sort(temp_vect.begin(), temp_vect.end());
            
            avg_mindist[k] = 0.0;
            
            for (int j = 0; j < smoothing; j++)
            {
                if (temp_vect[j] != 99999.99)
                {
                    avg_mindist[k] += temp_vect[j];
                }
                else 
                {
                    skip_flag = 1;
                }
                
            }
            
            //Don't try to compute anything for sectors with zero firms (which will screw up things)
            if (num_firms_naics6_back > 0)
            {
                
                if (skip_flag == 0)
                {
                    if (skip_num == 0)
                    {
                        avg_mindist[k] /= smoothing;
                    }
                    else
                    {
                        avg_mindist[k] /= num_firms_naics6_back;
                    }
                    
                }
                else
                {
                    if (skip_num == 1)
                    {
                        if (num_firms_naics6_back > 1)
                        {
                            avg_mindist[k] /= (num_firms_naics6_back-1);
                        }
                        else
                        {
                            avg_mindist[k] /= num_firms_naics6_back;
                        }
                        
                    }
                    else
                    {
                        if (smoothing > 1)
                        {
                            avg_mindist[k] /= (smoothing-1);
                        }
                        else
                        {
                            avg_mindist[k] /= smoothing;
                        }
                    }
                }
            }
            
        }
        
        //Compute the input and the output linkage measure;
        
        for (int k = 0; k < 259; k++)
        {
            //Firms in own sector are de facto excluded since the weight is equal to zero
            outspace[i][1] += (iomatrix_I[k][firm_naics] * avg_mindist[k]);
            outspace[i][2] += (iomatrix_O[firm_naics][k] * avg_mindist[k]);
        }
        
        fprintf(ofile, "%lf %lf %lf %lf %lf\n", geospace[i][0], geospace[i][3], geospace[i][5], outspace[i][1], outspace[i][2]);
        
        if (i%100 == 0)
        {
            printf("Iteration %ld done\n", i);
        }
    }
    
    fclose(ofile);
    
    
    //Cleanup step: Free the dynamically allocated memory
    
    mindist = free_large_array(259, mindist);
    geospace = free_large_array(geosize, geospace);
    
    for (long i = 0; i < 259; i++)
    {
        free(industry_map[i]);
    }
    
    free(industry_map);
    
    finish = clock();
    
    printf("%s: Elapsed time = %lud seconds\n", argv[0], (finish-start)/CLOCKS_PER_SEC);
    
    return(0);
}
